from __future__ import unicode_literals
import requests
from bs4 import BeautifulSoup
import time
import json

base_url='http://hot-gif.com/tag/'
tags=['asian','amazing','ass','pussy','teen','dildo']


for tag in tags:

    print(tag)
    url=base_url+tag
    file_name='hot_gif_'+tag+'.json'


    img_url_list=[]
    count=0
    mark=True
    while(mark):

        try:
            
            r=requests.get(url,timeout=5)
            content=r.content
            soup=BeautifulSoup(content,'lxml')

            #获取下一页url
            a_next=soup.find('a',attrs={'id':'next'})
            if(a_next==None):
                mark=False
            else:
                url=a_next['href']

            #获取图片地址
            img_set=soup.find_all('img',attrs={'class':'photo gif'})
            #获取mp4格式地址
            video_set=soup.find_all('source',attrs={'type':'video/mp4'})

            
            for img in img_set:
                info=[]
                info.append(img['src'])
                info.append('image')
                img_url_list.append(info)
                count=count+1

            for video in video_set:
                info=[]
                info.append(video['src'])
                info.append('video')
                img_url_list.append(info)
                count=count+1

            if(count>200):
                print('strat write to disk...'+str(count))
                with open(file_name,mode='w+',encoding='utf-8') as file:
                    file.write(json.dumps(img_url_list,ensure_ascii=False))
                count=0

        except Exception as e:
            print(e)
            print(url)
        finally:
            time.sleep(2)
            print(str(count),end='\r')

    print('退出url'+url)
    print('strat write to disk...'+str(count))
    with open(file_name,mode='w+') as file:
        file.write(json.dumps(img_url_list,ensure_ascii=False))
